<?php
/**
 * 爬取内容检测
 * User 1874
 */

namespace app\index\controller;
use app\BaseController;

class Crawling extends BaseController {
    /**
     * 需要检查的数据24条
     */
    private $check = [
        ["url"=>"http://kzj.luzhou.gov.cn/sjdt", "checkDay"=>14, "name"=>"市局动态", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/tzgg", "checkDay"=>180, "name"=>"通知公告", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/xqkj", "checkDay"=>30, "name"=>"区县科技", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zwgk/lzyj/bmwj/zcjd", "checkDay"=>180, "name"=>"政策解读", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zwgk/lzyj/bmwj/zcwj", "checkDay"=>180, "name"=>"政策文件", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zl/djgz", "checkDay"=>30, "name"=>"专栏-党建动态", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zl/yfzlzl", "checkDay"=>30, "name"=>"专栏-依法治理专栏", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zl/cwgkzl", "checkDay"=>180, "name"=>"专栏-人事财务专栏", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zcfg/sjkjcxzc", "checkDay"=>180, "name"=>"政策法规-泸州科技创新政策", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zcfg/sckjcxzc", "checkDay"=>180, "name"=>"政策法规-四川科技创新政策", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/zcfg/gjkjcxzc", "checkDay"=>180, "name"=>"政策法规-国家科技创新政策", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/kjjh/kjjfglzcfg", "checkDay"=>180, "name"=>"主营业务-科技计划-科技经费管理政策法规", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/kjjh/kjxmglzcfg", "checkDay"=>180, "name"=>"主营业务-科技计划-科技项目管理政策法规", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/kjjh/kjjhgl", "checkDay"=>30, "name"=>"主营业务-科技计划-科技计划管理", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/gykj", "checkDay"=>30, "name"=>"主营业务-工业科技", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/nckjykxpj/nckj", "checkDay"=>30, "name"=>"主营业务-农村科技与科学普及-农村科技", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/nckjykxpj/fpzxpt", "checkDay"=>30, "name"=>"主营业务-农村科技与科学普及-科技扶贫", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/nckjykxpj/kxpjykjjl", "checkDay"=>30, "name"=>"主营业务-农村科技与科学普及-科学普及与科技奖励", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/kjcg/kjcgzh", "checkDay"=>30, "name"=>"主营业务-成果转化与区域创新-科技成果转化", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/kjhz", "checkDay"=>30, "name"=>"主营业务-外专引智与科技合作", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/bsdt/ghyzcfg", "checkDay"=>30, "name"=>"主营业务-规划与政策法规", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/xgpt/fzzf/hzzfsxzdsdzl", "checkDay"=>30, "name"=>"相关平台-法治政府建设专栏-行政执法三项制度专栏", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/xgpt/fzzf/wwaaaq", "checkDay"=>30, "name"=>"相关平台-法治政府建设专栏-行政执法公示", "key"=>"span"],
        ["url"=>"http://kzj.luzhou.gov.cn/letterbox/bmxx/list", "checkDay"=>60, "name"=>"公众参与-部门信箱", "key"=>"td"]
    ];
    /**
     * 检测网页内容更新时间
     */
    public function checkWebsiteDate(){
        $num = 0;
        $toDay = date("Y-m-d");     //今天的日期
        foreach ($this->check as $value){
            $html = $this->getCurlHtml($value["url"]);       //获取Html内容
            $reg = $value["key"] == "span" ? "/<span class=\"date\">(.*?)<\/span>/i" : "/<td class=\"date\">(.*?)<\/td>/i"; //正则选取的标签名
            preg_match($reg, $html, $info);      //获取第一个span="date"内容
            if(count($info) > 0){
                $time = strtotime($info[1]." +{$value["checkDay"]} days");  //应该更新的时间
                $updateDay = date("Y-m-d", $time);    //应该更新的日期
                if($toDay >= $updateDay){
                    dump("{$value["name"]}，已过更新时间，栏目最新日期：{$info[1]}，链接：{$value["url"]}");
                }else{
                    dump("{$value["name"]}，未到更新时间，栏目最新日期：{$info[1]}");
                }
                $num++;
            }else{
                dump("{$value["name"]}，未获取到内容");
            }
            sleep(2);
        }
        dump("检测完毕，已检测栏目{$num}条");
        exit();
    }
    /**
     * curl 获取html内容
     */
    private function getCurlHtml(string $url){
        $ch = curl_init();
        $timeout = 30; // set to zero for no timeout
        curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11");
        curl_setopt($ch, CURLOPT_HTTPHEADER, ['X-FORWARDED-FOR:111.222.333.4', 'CLIENT-IP:111.222.333.4']);
        curl_setopt ($ch, CURLOPT_URL, $url);
        curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
        return curl_exec($ch);
    }
}